# First argument is the job id, second argument is the number of input files
# this program will create mapflags.xml, reduceflags.xml jobconf.xml
# 1. All input files should be in the current directory of the pattern
#   [Job_id]M[0->onwards].in
# 2. mapflags.xml will be created with a single struct tag.
# 3. jobconf.xml will be copied from testjobconf.xml
# 4. all folders will be created accordingly on HDFS
# 5. all old files will be deleted and remade
# 6. This script supposes that the mapflags.xml, reduceflags.xml and 
#    jobconf.xml lie in the same dir

import ma.utils.location
import ma.fs.dfs._hdfs

import sys
import os
import os.path
import ma.const

starting_file_no = 0
python_exec = ma.const.XmlData.get_filepath_str_data(ma.const.xml_python_exec)
src_path = ma.utils.location.get_src_ma_path()
scripts_path = ma.utils.location.get_scripts_path()

if len(sys.argv) != 4:
    print('usage: python setupjob.py [job_id] [no_input_files] [files_per_map]')
    sys.exit(-1)

# get argument variables    
job_id = int(sys.argv[1])
no_input_files = int(sys.argv[2])
files_per_map = int(sys.argv[3])

# delete mapflags if it already exists
mf_filename = ma.const.XmlData.get_str_data(ma.const.xml_map_flags_filename)
if os.path.isfile(mf_filename):
    print('Removing old map flags file')
    os.remove(mf_filename)
    
# delete reduceflags if it already exists
rf_filename = ma.const.XmlData.get_str_data(ma.const.xml_reduce_flags_filename)
if os.path.isfile(rf_filename):
    print('Removing old reduce flags file')
    os.remove(rf_filename)
    
# delete jobconf if it already exists
jc_filename = ma.const.XmlData.get_str_data(ma.const.xml_job_conf_filename)
if os.path.isfile(jc_filename):
    print('Removing old job conf file')
    os.remove(jc_filename)
    
# create new map flags
print('Creating new map flags file')
os.system(python_exec + ' ' + os.path.join(scripts_path, 'mfcreator.py') + ' ' + str(job_id) + ' ' + str(starting_file_no) + ' ' + str(no_input_files) + ' ' + str(files_per_map))

# copying standard reduce flags file
print('Copying standard reduce flags file')
os.system('cp ' + os.path.join(src_path, 'conf/' + rf_filename) + ' .')

# copying standard job conf file
print('Copying standard job conf file')
os.system('cp ' + os.path.join(src_path, 'conf/testjobconf.xml') + ' ' + jc_filename)

# Connect to HDFS
print('Connecting to HDFS')
host = ma.const.XmlData.get_str_data(ma.const.xml_hdfs_host)
port = ma.const.XmlData.get_int_data(ma.const.xml_hdfs_port)
hdfs  = ma.fs.dfs._hdfs.HDFS(host, port)

# (re)creating the main jobs path
print('(re)creating the main jobs directory')
mf_dfs_filepath = ma.const.JobsXmlData.get_dfs_filepath_str_data(ma.const.xml_dfs_path_job_map_flags, job_id)
main_path = os.path.dirname(mf_dfs_filepath)
hdfs.delete_dir(main_path, True)
hdfs.create_dir(main_path)

# updating old map flag file
print('Updating the map flag file')
hdfs.copy_file_from_local(mf_filename, main_path, job_id)

# updating old reduce flag file
print('Updating the reduce flag file')
rf_dfs_filepath = ma.const.JobsXmlData.get_dfs_filepath_str_data(ma.const.xml_dfs_path_job_reduce_flags, job_id)
hdfs.copy_file_from_local(rf_filename, main_path, job_id)

# updating old job_conf file
print('Updating the job conf file')
jc_dfs_filepath = ma.const.JobsXmlData.get_dfs_filepath_str_data(ma.const.xml_dfs_path_job_conf, job_id)
hdfs.copy_file_from_local(jc_filename, main_path, job_id)

# creating new dirs for input output on HDFS
dfs_inputpath = ma.const.JobsXmlData.get_dfs_filepath_str_data(ma.const.xml_dfs_path_job_input, job_id)
dfs_outputpath = ma.const.JobsXmlData.get_dfs_filepath_str_data(ma.const.xml_dfs_path_job_output, job_id)
print('(re)creating the jobs input directory')
hdfs.delete_dir(dfs_inputpath, True)
hdfs.create_dir(dfs_inputpath)

print('(re)creating the jobs output directory')
hdfs.delete_dir(dfs_outputpath, True)
hdfs.create_dir(dfs_outputpath)

# copying input
print('Copying input')
for file_no in range(starting_file_no, starting_file_no + no_input_files):
    filename = ma.const.JobsXmlData.get_str_data(ma.const.xml_map_input_filename, job_id, file_no)
    print('copying', filename, 'to DFS')
    ret = hdfs.copy_file_from_local(filename, dfs_inputpath, job_id)
    if ret == False:
        sys.exit()
